"""
使用lxml库解析html: 豆瓣电影内容爬取示例
"""
import requests
from lxml import etree

douban_movie_url = 'https://movie.douban.com/chart'

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
headers = {'User-agent': user_agent}

with requests.get(douban_movie_url, headers=headers) as response:
    html_str = response.text
    # 使用lxml类库的etree解析html
    html = etree.HTML(html_str)
    # 获取所有电影的url路径
    movie_url_list = html.xpath("//div[@class='indent']/div/table//div[@class='pl2']/a/@href")
    print(movie_url_list)

    # 获取所有电影的图片地址
    image_url_list = html.xpath("//div[@class='indent']/div/table//a[@class='nbg']/img/@src")
    print(image_url_list)
